package com.yidu.web.utils;

import com.yidu.web.dao.WeatherDao;
import com.yidu.web.model.WsWeather;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class SpiderTools {

    //省份的URL
    public static final String urlPro = "https://qq.ip138.com/weather";

    //省份的URL
    public static final String urlRoot = "https://qq.ip138.com";


    public static List<WsWeather> weatherList = new ArrayList<>();

    /**
     * 爬虫
     */
    public static void getProvince(String url){

        try {
            Document document = Jsoup.connect(url).get() ;
            Elements elements = document.select(".table .t12").select("a");
            for(Element ele:elements){
                String href = ele.attr("href"); //属性
                String name = ele.text() ;//省份或者城市
                System.out.println(name);

                if(name.indexOf("旅游景点")>-1 || "".equals(href)){
                    continue ;
                }//147258
                if("北京 天津 重庆 上海 澳门 香港".contains(name)){
                    WsWeather ws = new WsWeather();
                    ws.setProvince(name);
                    ws.setCity(name);
                    getWeather(href,ws);
                }else if("湖南".contains(name)){ //为了测试,只爬湖南的,否则网站会报502
                    //再爬城市
                    getCity(href, name);
                }
            }


            //走完之后保存到数据库
            WeatherDao dao = new WeatherDao();
            dao.insertList(weatherList);

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    /**
     * 递归爬虫
     */
    public static void getCity(String url,String proName){
        try {
            Document document = Jsoup.connect(urlRoot+""+url).get() ;
            Elements elements = document.select(".table .t12").select("a");
            for(Element ele:elements){
                String href = ele.attr("href"); //属性
                String name = ele.text() ;//省份或者城市
                System.out.println(name);
                WsWeather ws = new WsWeather();
                ws.setProvince(proName);
                ws.setCity(name);
                if("".equals(href)){
                    continue;
                }
                //再爬天气
                getWeather(href,ws);
                //为了减慢爬行速度,防止网站封杀,所以休息0.5秒
                try {
                    Thread.sleep(500);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 取天气
     * @param href
     * @param ws
     */
    private static void getWeather(String href, WsWeather ws) {
        Document document = null;
        try {
            document = Jsoup.connect(urlRoot+""+href).get();
            //取到.table .t12所有的tr,并且不包括.bg5
            Elements elements = document.select(".table .t12").select("tr:not(.bg5)");
            for(int i=0 ;i<elements.size();i++){
                if(i==0){
                    continue;
                }
                Element ele = elements.get(i);
                String txt = ele.text();
                System.out.println(txt);
                if(i==1){
                    ws.setDay0(txt);
                }else if(i==2){
                    ws.setDay1(txt);
                }else if(i==3){
                    ws.setDay2(txt);
                }else if(i==4){
                    ws.setDay3(txt);
                }else if(i==5){
                    ws.setDay4(txt);
                }
            }

            weatherList.add(ws);

        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    public static void main(String[] args) {
        //getProvince();
        //getWeather("https://qq.ip138.com/weather/anhui/AnQing.htm",null);
        getProvince(urlPro);

    }
}
